{
 "nbformat": 4,
 "nbformat_minor": 0,
 "metadata": {
  "colab": {
   "private_outputs": true,
   "provenance": [],
   "include_colab_link": true
  },
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "view-in-github",
    "colab_type": "text"
   },
   "source": [
    "<a href=\"https://colab.research.google.com/github/alexfazio/crewAI-quickstart/blob/main/crewai_sequential_DOCXSearchTool_quickstart.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "# crewai-sequential-DOCXSearchTool-quickstart\n",
    "\n",
    "By [Alex Fazio](https://www.x.com/alxfazio)\n",
    "\n",
    "📂 Github repo: https://github.com/alexfazio/crewai-quickstart\n",
    "\n",
    "This is a simplified and tested template of a **sequential** CrewAI crew.\n",
    "\n",
    "This team utilizes the `DOCXSearchTool` to conduct semantic **searches** within the content of DOCX files.\n",
    "\n",
    "Check the folder named `./output-files` for detailed results from each task or agent.\n",
    "\n",
    "Extra Requirements:\n",
    "- [OpenAI](https://platform.openai.com/playground)/[Groq](https://console.groq.com/settings/organization)/[Anthropic](https://console.anthropic.com/dashboard) API Key\n",
    "\n",
    "📚 CrewAI docs: https://docs.crewai.com/\n",
    "\n",
    "📚 DOCXSearchTool docs: https://docs.crewai.com/tools/DOCXSearchTool/\n",
    "\n",
    "📚 `chromadb` (embedding database) docs:  https://docs.trychroma.com/"
   ],
   "metadata": {
    "id": "ANCxcFs-qVl4"
   }
  },
  {
   "metadata": {
    "cellView": "form",
    "id": "B8D0bWf5v_jn"
   },
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# @title 👨‍🦯 Run this cell to hide all warnings (optional)\n",
    "# Warning control\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "# To avoid the restart session warning in Colab, exclude the PIL and\n",
    "# pydevd_plugins packages from being imported. This is fine because\n",
    "# we didn't execute the code in the kernel session afterward.\n",
    "\n",
    "# import sys\n",
    "# sys.modules.pop('PIL', None)"
   ]
  },
  {
   "cell_type": "code",
   "source": [
    "# @title ⬇️ Install project dependencies by running this cell\n",
    "# @markdown  🔄 Restart the session and rerun the cell **if Colab requires it**.\n",
    "\n",
    "!pip install git+https://github.com/joaomdmoura/crewAI.git --quiet\n",
    "!pip install crewai_tools langchain_openai langchain_groq langchain_anthropic langchain_community cohere --quiet\n",
    "print(\"---\")\n",
    "!pip show crewAI crewai_tools langchain_openai langchain_groq langchain_anthropic langchain_community cohere"
   ],
   "metadata": {
    "id": "P8iHNKCfk9Rv",
    "cellView": "form"
   },
   "execution_count": null,
   "outputs": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "BhAt-unGk4kA",
    "cellView": "form"
   },
   "outputs": [],
   "source": [
    "# @title 🔑 Input API Key by running this cell\n",
    "\n",
    "import os\n",
    "from getpass import getpass\n",
    "from crewai import Agent, Task, Crew, Process\n",
    "from textwrap import dedent\n",
    "from langchain_openai import ChatOpenAI\n",
    "# ↑ uncomment to use OpenAI's API\n",
    "# from langchain_groq import ChatGroq\n",
    "# ↑ uncomment to use Groq's API\n",
    "# from langchain_anthropic import ChatAnthropic\n",
    "# ↑ uncomment to use Antrhopic's API\n",
    "# from langchain_community.chat_models import ChatCohere\n",
    "# ↑ uncomment to use ChatCohere API\n",
    "\n",
    "os.environ[\"OPENAI_API_KEY\"] = getpass(\"Enter OPENAI_API_KEY: \")\n",
    "# ↑ uncomment to use OpenAI's API\n",
    "# os.environ[\"GROQ_API_KEY\"] = getpass(\"Enter GROQ_API_KEY: \")\n",
    "# ↑ uncomment to use Groq's API\n",
    "# os.environ[\"ANTHROPIC_API_KEY\"] = getpass(\"Enter ANTHROPIC_API_KEY: \")\n",
    "# ↑ uncomment to use Anthropic's API\n",
    "# os.environ[\"COHERE_API_KEY\"] = getpass(\"Enter COHERE_API_KEY: \")\n",
    "# ↑ uncomment to use Cohere's API\n",
    "\n",
    "# Check if the 'output-files' directory exists, and create it if it doesn't\n",
    "if not os.path.exists('output-files'):\n",
    "    os.makedirs('output-files')"
   ]
  },
  {
   "cell_type": "code",
   "source": [
    "# @title ⬇📇 Download Sample `DOCX` Document\n",
    "\n",
    "import os\n",
    "import requests\n",
    "\n",
    "# Specify the folder path where you want to save the file\n",
    "folder_path = 'dataset'\n",
    "\n",
    "# Create the folder if it doesn't exist\n",
    "if not os.path.exists(folder_path):\n",
    "    os.makedirs(folder_path)\n",
    "\n",
    "# Sample .pdf data from Arxiv\n",
    "docx_url = 'https://www.doe.mass.edu/mcas/accessibility/ell-bilingual.docx' # @param {type:\"string\"}\n",
    "response = requests.get(docx_url)\n",
    "\n",
    "# Extract the filename from the URL\n",
    "filename = os.path.basename(docx_url)\n",
    "\n",
    "# Specify the file path including the folder and the extracted filename\n",
    "docx_file_path = os.path.join(folder_path, filename)\n",
    "\n",
    "with open(docx_file_path, 'wb') as file:\n",
    "    file.write(response.content)\n",
    "\n",
    "print(f\"`{filename}` downloaded and saved to: {docx_file_path}\")"
   ],
   "metadata": {
    "id": "jiwfyUxYLwTT",
    "cellView": "form"
   },
   "execution_count": null,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "# @title 📇 Instantiate `DOCXSearchTool`\n",
    "\n",
    "# @markdown By default, the tool uses OpenAI for both embeddings and summarization.\n",
    "\n",
    "# @markdown To customize the model, you can use a config dictionary. See how [here](https://docs.crewai.com/tools/DOCXSearchTool/#custom-model-and-embeddings).\n",
    "\n",
    "!pip install docx2txt\n",
    "\n",
    "from crewai_tools import DOCXSearchTool  # Updated import path\n",
    "\n",
    "# General DOCX content search\n",
    "\n",
    "# This approach is suitable when the DOCX path is either known beforehand or can be dynamically identified.\n",
    "\n",
    "# docx_search_tool = DOCXSearchTool()\n",
    "\n",
    "# Restricting search to a specific DOCX file\n",
    "\n",
    "# Use this initialization method when you want to limit the search scope to a specific DOCX file.\n",
    "\n",
    "docx_search_tool = DOCXSearchTool(docx=docx_file_path)"
   ],
   "metadata": {
    "id": "IENZ1rmviDZY",
    "cellView": "form"
   },
   "execution_count": null,
   "outputs": []
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": [
    "## Define Agents\n",
    "In CrewAI, agents are autonomous entities designed to perform specific roles and achieve particular goals. Each agent uses a language model (LLM) and may have specialized tools to help execute tasks."
   ]
  },
  {
   "cell_type": "code",
   "source": [
    "# @title 🕵🏻 Define your agents\n",
    "\n",
    "from langchain_community.chat_models import ChatOpenAI\n",
    "from crewai import Agent\n",
    "\n",
    "# from langchain_groq import ChatGroq\n",
    "# ↑ Uncomment to use Groq's API\n",
    "# from langchain_anthropic import ChatAnthropic\n",
    "# ↑ Uncomment to use Anthropic's API\n",
    "\n",
    "agent_1 = Agent(\n",
    "    role=dedent((\n",
    "        \"\"\"\n",
    "        Defines the agent's function within the crew. It determines the kind of tasks the agent is best suited for.\n",
    "        \"\"\")), # Think of this as the job title\n",
    "    backstory=dedent((\n",
    "        \"\"\"\n",
    "        Provides context to the agent's role and goal, enriching the interaction and collaboration dynamics.\n",
    "        \"\"\")), # This is the backstory of the agent, this helps the agent to understand the context of the task\n",
    "    goal=dedent((\n",
    "        \"\"\"\n",
    "        The individual objective that the agent aims to achieve. It guides the agent's decision-making process.\n",
    "        \"\"\")), # This is the goal that the agent is trying to achieve\n",
    "    tools=[docx_search_tool],\n",
    "    allow_delegation=False,\n",
    "    verbose=True,\n",
    "    # ↑ Whether the agent execution should be in verbose mode\n",
    "    max_iter=3,\n",
    "    # ↑ maximum number of iterations the agent can perform before being forced to give its best answer (generate the output)\n",
    "    max_rpm=100, # This is the maximum number of requests per minute that the agent can make to the language model\n",
    "    llm=ChatOpenAI(model_name=\"gpt-4o\", temperature=0.8)\n",
    "    # ↑ uncomment to use OpenAI API + \"gpt-4o\"\n",
    "    # llm=ChatGroq(temperature=0.8, model_name=\"mixtral-8x7b-32768\"),\n",
    "    # ↑ uncomment to use Groq's API + \"llama3-70b-8192\"\n",
    "    # llm=ChatGroq(temperature=0.6, model_name=\"llama3-70b-8192\"),\n",
    "    # ↑ uncomment to use Groq's API + \"mixtral-8x7b-32768\"\n",
    "    # llm = ChatAnthropic(model='claude-3-opus-20240229', temperature=0.8),\n",
    "    # ↑ uncomment to use Anthropic's API + \"claude-3-opus-20240229\"\n",
    ")\n",
    "\n",
    "agent_2 = Agent(\n",
    "    role=dedent((\n",
    "        \"\"\"\n",
    "        Defines the agent's function within the crew. It determines the kind of tasks the agent is best suited for.\n",
    "        \"\"\")), # Think of this as the job title\n",
    "    backstory=dedent((\n",
    "        \"\"\"\n",
    "        Provides context to the agent's role and goal, enriching the interaction and collaboration dynamics.\n",
    "        \"\"\")), # This is the backstory of the agent, this helps the agent to understand the context of the task\n",
    "    goal=dedent((\n",
    "        \"\"\"\n",
    "        The individual objective that the agent aims to achieve. It guides the agent's decision-making process.\n",
    "        \"\"\")), # This is the goal that the agent is trying to achieve\n",
    "    tools=[docx_search_tool],\n",
    "    allow_delegation=False,\n",
    "    verbose=True,\n",
    "    # ↑ Whether the agent execution should be in verbose mode\n",
    "    max_iter=3,\n",
    "    # ↑ maximum number of iterations the agent can perform before being forced to give its best answer (generate the output)\n",
    "    max_rpm=100, # This is the maximum number of requests per minute that the agent can make to the language model\n",
    "    llm=ChatOpenAI(model_name=\"gpt-4o\", temperature=0.8)\n",
    "    # ↑ uncomment to use OpenAI API + \"gpt-4o\"\n",
    "    # llm=ChatGroq(temperature=0.8, model_name=\"mixtral-8x7b-32768\"),\n",
    "    # ↑ uncomment to use Groq's API + \"llama3-70b-8192\"\n",
    "    # llm=ChatGroq(temperature=0.6, model_name=\"llama3-70b-8192\"),\n",
    "    # ↑ uncomment to use Groq's API + \"mixtral-8x7b-32768\"\n",
    "    # llm = ChatAnthropic(model='claude-3-opus-20240229', temperature=0.8),\n",
    "    # ↑ uncomment to use Anthropic's API + \"claude-3-opus-20240229\"\n",
    ")\n",
    "\n",
    "agent_3 = Agent(\n",
    "    role=dedent((\n",
    "        \"\"\"\n",
    "        Defines the agent's function within the crew. It determines the kind of tasks the agent is best suited for.\n",
    "        \"\"\")), # Think of this as the job title\n",
    "    backstory=dedent((\n",
    "        \"\"\"\n",
    "        Provides context to the agent's role and goal, enriching the interaction and collaboration dynamics.\n",
    "        \"\"\")), # This is the backstory of the agent, this helps the agent to understand the context of the task\n",
    "    goal=dedent((\n",
    "        \"\"\"\n",
    "        The individual objective that the agent aims to achieve. It guides the agent's decision-making process.\n",
    "        \"\"\")), # This is the goal that the agent is trying to achieve\n",
    "    tools=[docx_search_tool],\n",
    "    allow_delegation=False,\n",
    "    verbose=True,\n",
    "    # ↑ Whether the agent execution should be in verbose mode\n",
    "    max_iter=3,\n",
    "    # ↑ maximum number of iterations the agent can perform before being forced to give its best answer (generate the output)\n",
    "    max_rpm=100, # This is the maximum number of requests per minute that the agent can make to the language model\n",
    "    llm=ChatOpenAI(model_name=\"gpt-4o\", temperature=0.8)\n",
    "    # ↑ uncomment to use OpenAI API + \"gpt-4o\"\n",
    "    # llm=ChatGroq(temperature=0.8, model_name=\"mixtral-8x7b-32768\"),\n",
    "    # ↑ uncomment to use Groq's API + \"llama3-70b-8192\"\n",
    "    # llm=ChatGroq(temperature=0.6, model_name=\"llama3-70b-8192\"),\n",
    "    # ↑ uncomment to use Groq's API + \"mixtral-8x7b-32768\"\n",
    "    # llm = ChatAnthropic(model='claude-3-opus-20240229', temperature=0.8),\n",
    "    # ↑ uncomment to use Anthropic's API + \"claude-3-opus-20240229\"\n",
    ")"
   ],
   "metadata": {
    "id": "hZJwUoXasrhx",
    "cellView": "form"
   },
   "execution_count": null,
   "outputs": []
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": [
    "## Define Tasks\n",
    "Tasks in CrewAI are specific assignments given to agents, detailing the actions they need to perform to achieve a particular goal. Tasks can have dependencies and context, and can be executed asynchronously to ensure an efficient workflow."
   ]
  },
  {
   "cell_type": "code",
   "source": [
    "# @title 📝 Define your tasks\n",
    "\n",
    "import datetime\n",
    "from crewai import Task\n",
    "\n",
    "task_1 = Task(\n",
    "    description=dedent((\n",
    "        \"\"\"\n",
    "        A clear, concise statement of what the task entails.\n",
    "        ---\n",
    "        VARIABLE 1: \"{var_1}\"\n",
    "        VARIABLE 2: \"{var_2}\"\n",
    "        VARIABLE 3: \"{var_3}\"\n",
    "        Add more variables if needed...\n",
    "        \"\"\")),\n",
    "    expected_output=dedent((\n",
    "        \"\"\"\n",
    "        A detailed description of what the task's completion looks like.\n",
    "        \"\"\")),\n",
    "    agent=agent_1,\n",
    "    output_file=f'output-files/agent_1-output_{datetime.datetime.now().strftime(\"%Y%m%d_%H%M%S\")}.md'\n",
    "    # ↑ The output of each task iteration will be saved here\n",
    ")\n",
    "\n",
    "task_2 = Task(\n",
    "    description=dedent((\n",
    "        \"\"\"\n",
    "        A clear, concise statement of what the task entails.\n",
    "        ---\n",
    "        VARIABLE 1: \"{var_1}\"\n",
    "        VARIABLE 2: \"{var_2}\"\n",
    "        VARIABLE 3: \"{var_3}\"\n",
    "        Add more variables if needed...\n",
    "        \"\"\")),\n",
    "    expected_output=dedent((\n",
    "        \"\"\"\n",
    "        A detailed description of what the task's completion looks like.\n",
    "        \"\"\")),\n",
    "    agent=agent_2,\n",
    "    context=[task_1],\n",
    "    # ↑ specify which task's output should be used as context for subsequent tasks\n",
    "    output_file=f'output-files/agent_2-output_{datetime.datetime.now().strftime(\"%Y%m%d_%H%M%S\")}.md'\n",
    "    # ↑ The output of each task iteration will be saved here\n",
    ")\n",
    "\n",
    "task_3 = Task(\n",
    "    description=dedent((\n",
    "        \"\"\"\n",
    "        A clear, concise statement of what the task entails.\n",
    "        ---\n",
    "        VARIABLE 1: \"{var_1}\"\n",
    "        VARIABLE 2: \"{var_2}\"\n",
    "        VARIABLE 3: \"{var_3}\"\n",
    "        Add more variables if needed...\n",
    "        \"\"\")),\n",
    "    expected_output=dedent((\n",
    "        \"\"\"\n",
    "        A detailed description of what the task's completion looks like.\n",
    "        \"\"\")),\n",
    "    agent=agent_3,\n",
    "    context=[task_2],\n",
    "    # ↑ specify which task's output should be used as context for subsequent tasks\n",
    "    output_file=f'output-files/agent_3-output_{datetime.datetime.now().strftime(\"%Y%m%d_%H%M%S\")}.md'\n",
    "    # ↑ The output of each task iteration will be saved here\n",
    ")"
   ],
   "metadata": {
    "id": "dqtn3w1qs-Bu",
    "cellView": "form"
   },
   "execution_count": null,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "# @title ⌨️ Define any variables you have and input them\n",
    "print(\"## Welcome to the YOUR_CREW_NAME\")\n",
    "print('-------------------------------------------')\n",
    "var_1 = input(\"What is the  to pass to your crew?\\n\"),\n",
    "var_2 = input(\"What is the  to pass to your crew?\\n\"),\n",
    "var_3 = input(\"What is the  to pass to your crew?\\n\"),\n",
    "print(\"-------------------------------\")"
   ],
   "metadata": {
    "id": "HfJRdGHesMwn",
    "cellView": "form"
   },
   "execution_count": null,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "# @title 🚀 Get your crew to work!\n",
    "\n",
    "from crewai import Crew, Process\n",
    "\n",
    "def main():\n",
    "    # Instantiate your crew with a sequential process\n",
    "    crew = Crew(\n",
    "        agents=[agent_1, agent_2, agent_3],\n",
    "        tasks=[task_1, task_2, task_3],\n",
    "        verbose=True,  # You can set it to True or False\n",
    "        # ↑ indicates the verbosity level for logging during execution.\n",
    "        process=Process.sequential\n",
    "        # ↑ the process flow that the crew will follow (e.g., sequential, hierarchical).\n",
    "    )\n",
    "\n",
    "    inputs = {\n",
    "    \"var_1\": var_1,\n",
    "    \"var_2\": var_2,\n",
    "    \"var_3\": var_3\n",
    "    }\n",
    "\n",
    "    result = crew.kickoff(inputs=inputs)\n",
    "    print(\"\\n\\n########################\")\n",
    "    print(\"## Here is your custom crew run result:\")\n",
    "    print(\"########################\\n\")\n",
    "    print(result)\n",
    "    \n",
    "    return result\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "  result = main()"
   ],
   "metadata": {
    "id": "nrBn8dMlxfCn",
    "cellView": "form"
   },
   "execution_count": null,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "# @title 🖥️ Display the results of your crew as markdown\n",
    "from IPython.display import display, Markdown\n",
    "\n",
    "markdown_text = result.raw  # Adjust this based on the actual attribute\n",
    "\n",
    "# Display the markdown content\n",
    "display(Markdown(markdown_text))"
   ],
   "metadata": {
    "cellView": "form",
    "id": "ItvUFDNGxMu6"
   },
   "execution_count": null,
   "outputs": []
  }
 ]
}
